/*
** $Id: llex.c,v 2.96.1.1 2017/04/19 17:20:42 roberto Exp $
** Lexical Analyzer
** See Copyright Notice in lua.h
*/

#define llex_c
#define LUA_CORE

#include "lprefix.h"

#include <locale.h>
#include <string.h>

#include "lua.h"

#include "lctype.h"
#include "ldebug.h"
#include "ldo.h"
#include "lgc.h"
#include "llex.h"
#include "lobject.h"
#include "lparser.h"
#include "lstate.h"
#include "lstring.h"
#include "ltable.h"
#include "lzio.h"

#define next(ls) (ls->current = zgetc(ls->z))

#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')

/* ORDER RESERVED */
static const char* const luaX_tokens[] = {"break", "do",    "else", "elif",   "end",      "false",     "for",    "function", "goto",  "if",
                                          "in",    "local", "nil",  "repeat", "continue", "return",    "then",   "true",     "until", "while",
                                          "//",    "..",    "...",  "==",     ">=",       "<=",        "!=",     "&&",       "||",    "**",
                                          "<<",    ">>",    "::",   "<eof>",  "<number>", "<integer>", "<name>", "<string>"};

#define save_and_next(ls) (save(ls, ls->current), next(ls))

static l_noret lexerror(LexState* ls, const char* msg, int token);

static void save(LexState* ls, int c)
{
    Mbuffer* b = ls->buff;
    if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b))
    {
        size_t newsize;
        if (luaZ_sizebuffer(b) >= MAX_SIZE / 2)
            lexerror(ls, "lexical element too long", 0);
        newsize = luaZ_sizebuffer(b) * 2;
        luaZ_resizebuffer(ls->L, b, newsize);
    }
    b->buffer[luaZ_bufflen(b)++] = cast(char, c);
}

void luaX_init(lua_State* L)
{
    int i;
    TString* e = luaS_newliteral(L, LUA_ENV); /* create env name */
    luaC_fix(L, obj2gco(e));                  /* never collect this name */
    for (i = 0; i < NUM_RESERVED; i++)
    {
        TString* ts = luaS_new(L, luaX_tokens[i]);
        luaC_fix(L, obj2gco(ts));     /* reserved words are never collected */
        ts->extra = cast_byte(i + 1); /* reserved word */
    }
}

const char* luaX_token2str(LexState* ls, int token)
{
    if (token < FIRST_RESERVED)
    { /* single-byte symbols? */
        lua_assert(token == cast_uchar(token));
        return luaO_pushfstring(ls->L, "'%c'", token);
    }
    else
    {
        const char* s = luaX_tokens[token - FIRST_RESERVED];
        if (token < TK_EOS) /* fixed format (symbols and reserved words)? */
            return luaO_pushfstring(ls->L, "'%s'", s);
        else /* names, strings, and numerals */
            return s;
    }
}

static const char* txtToken(LexState* ls, int token)
{
    switch (token)
    {
    case TK_NAME:
    case TK_STRING:
    case TK_FLT:
    case TK_INT:
        save(ls, '\0');
        return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff));
    default:
        return luaX_token2str(ls, token);
    }
}

static l_noret lexerror(LexState* ls, const char* msg, int token)
{
    msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber);
    if (token)
        luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
    luaD_throw(ls->L, LUA_ERRSYNTAX);
}

l_noret luaX_syntaxerror(LexState* ls, const char* msg) { lexerror(ls, msg, ls->t.token); }

/*
** creates a new string and anchors it in scanner's table so that
** it will not be collected until the end of the compilation
** (by that time it should be anchored somewhere)
*/
TString* luaX_newstring(LexState* ls, const char* str, size_t l)
{
    lua_State* L = ls->L;
    TValue* o;                             /* entry for 'str' */
    TString* ts = luaS_newlstr(L, str, l); /* create new string */
    setsvalue2s(L, L->top++, ts);          /* temporarily anchor it in stack */
    o = luaH_set(L, ls->h, L->top - 1);
    if (ttisnil(o))
    { /* not in use yet? */
        /* boolean value does not need GC barrier;
       table has no metatable, so it does not need to invalidate cache */
        setbvalue(o, 1); /* t[string] = true */
        luaC_checkGC(L);
    }
    else
    {                                /* string already present */
        ts = tsvalue(keyfromval(o)); /* re-use value previously stored */
    }
    L->top--; /* remove string from stack */
    return ts;
}

/*
** increment line number and skips newline sequence (any of
** \n, \r, \n\r, or \r\n)
*/
static void inclinenumber(LexState* ls)
{
    int old = ls->current;
    lua_assert(currIsNewline(ls));
    next(ls); /* skip '\n' or '\r' */
    if (currIsNewline(ls) && ls->current != old)
        next(ls); /* skip '\n\r' or '\r\n' */
    if (++ls->linenumber >= MAX_INT)
        lexerror(ls, "chunk has too many lines", 0);
}

void luaX_setinput(lua_State* L, LexState* ls, ZIO* z, TString* source, int firstchar)
{
    ls->t.token = 0;
    ls->L = L;
    ls->current = firstchar;
    ls->lookahead.token = TK_EOS; /* no look-ahead token */
    ls->z = z;
    ls->fs = NULL;
    ls->linenumber = 1;
    ls->lastline = 1;
    ls->source = source;
    ls->envn = luaS_newliteral(L, LUA_ENV);            /* get env name */
    luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */
}

/*
** =======================================================
** LEXICAL ANALYZER
** =======================================================
*/

static int check_next1(LexState* ls, int c)
{
    if (ls->current == c)
    {
        next(ls);
        return 1;
    }
    else
        return 0;
}

/*
** Check whether current char is in set 'set' (with two chars) and
** saves it
*/
static int check_next2(LexState* ls, const char* set)
{
    lua_assert(set[2] == '\0');
    if (ls->current == set[0] || ls->current == set[1])
    {
        save_and_next(ls);
        return 1;
    }
    else
        return 0;
}

/* LUA_NUMBER */
/*
** this function is quite liberal in what it accepts, as 'luaO_str2num'
** will reject ill-formed numerals.
*/
static int read_numeral(LexState* ls, SemInfo* seminfo)
{
    TValue obj;
    const char* expo = "Ee";
    int first = ls->current;
    lua_assert(lisdigit(ls->current));
    save_and_next(ls);
    if (first == '0' && check_next2(ls, "xX")) /* hexadecimal? */
        expo = "Pp";
    for (;;)
    {
        if (check_next2(ls, expo)) /* exponent part? */
            check_next2(ls, "-+"); /* optional exponent sign */
        if (lisxdigit(ls->current))
            save_and_next(ls);
        else if (ls->current == '.')
            save_and_next(ls);
        else
            break;
    }
    save(ls, '\0');
    if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0) /* format error? */
        lexerror(ls, "malformed number", TK_FLT);
    if (ttisinteger(&obj))
    {
        seminfo->i = ivalue(&obj);
        return TK_INT;
    }
    else
    {
        lua_assert(ttisfloat(&obj));
        seminfo->r = fltvalue(&obj);
        return TK_FLT;
    }
}

/*
** reads a sequence '[=*[' or ']=*]', leaving the last bracket.
** If sequence is well formed, return its number of '='s + 2; otherwise,
** return 1 if there is no '='s or 0 otherwise (an unfinished '[==...').
*/
static size_t skip_sep(LexState* ls)
{
    size_t count = 0;
    int s = ls->current;
    lua_assert(s == '[' || s == ']');
    save_and_next(ls);
    while (ls->current == '=')
    {
        save_and_next(ls);
        count++;
    }
    return (ls->current == s) ? count + 2 : (count == 0) ? 1 : 0;
}

static void read_long_string(LexState* ls, SemInfo* seminfo, size_t sep)
{
    int line = ls->linenumber; /* initial line (for error message) */
    save_and_next(ls);         /* skip 2nd '[' */
    if (currIsNewline(ls))     /* string starts with a newline? */
        inclinenumber(ls);     /* skip it */
    for (;;)
    {
        switch (ls->current)
        {
        case EOZ:
        { /* error */
            const char* what = (seminfo ? "string" : "comment");
            const char* msg = luaO_pushfstring(ls->L, "unfinished long %s (starting at line %d)", what, line);
            lexerror(ls, msg, TK_EOS);
            break; /* to avoid warnings */
        }
        case ']':
        {
            if (skip_sep(ls) == sep)
            {
                save_and_next(ls); /* skip 2nd ']' */
                goto endloop;
            }
            break;
        }
        case '\n':
        case '\r':
        {
            save(ls, '\n');
            inclinenumber(ls);
            if (!seminfo)
                luaZ_resetbuffer(ls->buff); /* avoid wasting space */
            break;
        }
        default:
        {
            if (seminfo)
                save_and_next(ls);
            else
                next(ls);
        }
        }
    }
endloop:
    if (seminfo)
        seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + sep, luaZ_bufflen(ls->buff) - 2 * sep);
}

static void esccheck(LexState* ls, int c, const char* msg)
{
    if (!c)
    {
        if (ls->current != EOZ)
            save_and_next(ls); /* add current to buffer for error message */
        lexerror(ls, msg, TK_STRING);
    }
}

static int gethexa(LexState* ls)
{
    save_and_next(ls);
    esccheck(ls, lisxdigit(ls->current), "hexadecimal digit expected");
    return luaO_hexavalue(ls->current);
}

static int readhexaesc(LexState* ls)
{
    int r = gethexa(ls);
    r = (r << 4) + gethexa(ls);
    luaZ_buffremove(ls->buff, 2); /* remove saved chars from buffer */
    return r;
}

static unsigned long readutf8esc(LexState* ls)
{
    unsigned long r;
    int i = 4;         /* chars to be removed: '\', 'u', '{', and first digit */
    save_and_next(ls); /* skip 'u' */
    esccheck(ls, ls->current == '{', "missing '{'");
    r = gethexa(ls); /* must have at least one digit */
    while ((save_and_next(ls), lisxdigit(ls->current)))
    {
        i++;
        r = (r << 4) + luaO_hexavalue(ls->current);
        esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large");
    }
    esccheck(ls, ls->current == '}', "missing '}'");
    next(ls);                     /* skip '}' */
    luaZ_buffremove(ls->buff, i); /* remove saved chars from buffer */
    return r;
}

static void utf8esc(LexState* ls)
{
    char buff[UTF8BUFFSZ];
    int n = luaO_utf8esc(buff, readutf8esc(ls));
    for (; n > 0; n--) /* add 'buff' to string */
        save(ls, buff[UTF8BUFFSZ - n]);
}

static int readdecesc(LexState* ls)
{
    int i;
    int r = 0; /* result accumulator */
    for (i = 0; i < 3 && lisdigit(ls->current); i++)
    { /* read up to 3 digits */
        r = 10 * r + ls->current - '0';
        save_and_next(ls);
    }
    esccheck(ls, r <= UCHAR_MAX, "decimal escape too large");
    luaZ_buffremove(ls->buff, i); /* remove read digits from buffer */
    return r;
}

static void read_string(LexState* ls, int del, SemInfo* seminfo)
{
    save_and_next(ls); /* keep delimiter (for error messages) */
    while (ls->current != del)
    {
        switch (ls->current)
        {
        case EOZ:
            lexerror(ls, "unfinished string", TK_EOS);
            break; /* to avoid warnings */
        case '\n':
        case '\r':
            lexerror(ls, "unfinished string", TK_STRING);
            break; /* to avoid warnings */
        case '\\':
        {                      /* escape sequences */
            int c;             /* final character to be saved */
            save_and_next(ls); /* keep '\\' for error messages */
            switch (ls->current)
            {
            case 'a':
                c = '\a';
                goto read_save;
            case 'b':
                c = '\b';
                goto read_save;
            case 'f':
                c = '\f';
                goto read_save;
            case 'n':
                c = '\n';
                goto read_save;
            case 'r':
                c = '\r';
                goto read_save;
            case 't':
                c = '\t';
                goto read_save;
            case 'v':
                c = '\v';
                goto read_save;
            case 'x':
                c = readhexaesc(ls);
                goto read_save;
            case 'u':
                utf8esc(ls);
                goto no_save;
            case '\n':
            case '\r':
                inclinenumber(ls);
                c = '\n';
                goto only_save;
            case '\\':
            case '\"':
            case '\'':
                c = ls->current;
                goto read_save;
            case EOZ:
                goto no_save; /* will raise an error next loop */
            case 'z':
            {                                 /* zap following span of spaces */
                luaZ_buffremove(ls->buff, 1); /* remove '\\' */
                next(ls);                     /* skip the 'z' */
                while (lisspace(ls->current))
                {
                    if (currIsNewline(ls))
                        inclinenumber(ls);
                    else
                        next(ls);
                }
                goto no_save;
            }
            default:
            {
                esccheck(ls, lisdigit(ls->current), "invalid escape sequence");
                c = readdecesc(ls); /* digital escape '\ddd' */
                goto only_save;
            }
            }
        read_save:
            next(ls);
            /* go through */
        only_save:
            luaZ_buffremove(ls->buff, 1); /* remove '\\' */
            save(ls, c);
            /* go through */
        no_save:
            break;
        }
        default:
            save_and_next(ls);
        }
    }
    save_and_next(ls); /* skip delimiter */
    seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1, luaZ_bufflen(ls->buff) - 2);
}

static int llex(LexState* ls, SemInfo* seminfo)
{
    luaZ_resetbuffer(ls->buff);
    for (;;)
    {
        switch (ls->current)
        {
        case '\n':
        case '\r':
        { /* line breaks */
            inclinenumber(ls);
            break;
        }
        case ' ':
        case '\f':
        case '\t':
        case '\v':
        { /* spaces */
            next(ls);
            break;
        }
        case '#':
        { /* '#' (comment) */
            next(ls);
            // if (ls->current != '-') return '-';
            // /* else is a comment */
            // next(ls);
            // if (ls->current == '[') {  /* long comment? */
            //   size_t sep = skip_sep(ls);
            //   luaZ_resetbuffer(ls->buff);  /* 'skip_sep' may dirty the buffer */
            //   if (sep >= 2) {
            //     read_long_string(ls, NULL, sep);  /* skip long comment */
            //     luaZ_resetbuffer(ls->buff);  /* previous call may dirty the buff. */
            //     break;
            //   }
            // }
            /* else short comment */
            while (!currIsNewline(ls) && ls->current != EOZ) next(ls); /* skip until end of line (or end of file) */
            break;
        }
        case '&':
        {
            next(ls);
            if (check_next1(ls, '&'))
                return TK_AND;
            else
                return '&';
        }
        case '|':
        {
            next(ls);
            if (check_next1(ls, '|'))
                return TK_OR;
            else
                return '&';
        }
        case '*':
        {
            next(ls);
            if (check_next1(ls, '*'))
                return TK_POW;
            else
                return '*';
        }
        case '=':
        {
            next(ls);
            if (check_next1(ls, '='))
                return TK_EQ;
            else
                return '=';
        }
        case '<':
        {
            next(ls);
            if (check_next1(ls, '='))
                return TK_LE;
            else if (check_next1(ls, '<'))
                return TK_SHL;
            else
                return '<';
        }
        case '>':
        {
            next(ls);
            if (check_next1(ls, '='))
                return TK_GE;
            else if (check_next1(ls, '>'))
                return TK_SHR;
            else
                return '>';
        }
        case '/':
        {
            next(ls);
            if (check_next1(ls, '/'))
                return TK_IDIV;
            else
                return '/';
        }
        case '!':
        {
            next(ls);
            if (check_next1(ls, '='))
                return TK_NE;
            else
                return '!';
        }
        case ':':
        {
            next(ls);
            if (check_next1(ls, ':'))
                return TK_DBCOLON;
            else
                return ':';
        }
        case '"':
        case '\'':
        { /* short literal strings */
            read_string(ls, ls->current, seminfo);
            return TK_STRING;
        }
        case '.':
        { /* '.', '..', '...', or number */
            save_and_next(ls);
            if (check_next1(ls, '.'))
            {
                if (check_next1(ls, '.'))
                    return TK_DOTS; /* '...' */
                else
                    return TK_CONCAT; /* '..' */
            }
            else if (!lisdigit(ls->current))
                return '.';
            else
                return read_numeral(ls, seminfo);
        }
        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
        {
            return read_numeral(ls, seminfo);
        }
        case EOZ:
        {
            return TK_EOS;
        }
        default:
        {
            if (lislalpha(ls->current))
            { /* identifier or reserved word? */
                TString* ts;
                do
                {
                    save_and_next(ls);
                } while (lislalnum(ls->current));
                ts = luaX_newstring(ls, luaZ_buffer(ls->buff), luaZ_bufflen(ls->buff));
                seminfo->ts = ts;
                if (isreserved(ts)) /* reserved word? */
                    return ts->extra - 1 + FIRST_RESERVED;
                else
                {
                    return TK_NAME;
                }
            }
            else
            { /* single-char tokens (+ - / ...) */
                int c = ls->current;
                next(ls);
                return c;
            }
        }
        }
    }
}

void luaX_next(LexState* ls)
{
    ls->lastline = ls->linenumber;
    if (ls->lookahead.token != TK_EOS)
    {                                 /* is there a look-ahead token? */
        ls->t = ls->lookahead;        /* use this one */
        ls->lookahead.token = TK_EOS; /* and discharge it */
    }
    else
        ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */
}

int luaX_lookahead(LexState* ls)
{
    lua_assert(ls->lookahead.token == TK_EOS);
    ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
    return ls->lookahead.token;
}
